Purpose:
Runs survival analysis models using splicing cluster assignment and 1) single exon splicing burden index (SBI) or 2) KEGG Spliceosome GSVA scores as a predictor
Uses a wrapper function (survival_analysis) from utils
folder.
Load packages, set directory paths and call setup script
library(tidyverse)
library(survival)
library(ggpubr)
library(ggplot2)
library(patchwork)
root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
data_dir <- file.path(root_dir, "data")
analysis_dir <- file.path(root_dir, "analyses", "survival")
input_dir <- file.path(analysis_dir, "results")
results_dir <- file.path(analysis_dir, "results")
plot_dir <- file.path(analysis_dir, "plots")
# If the input and results directories do not exist, create it
if (!dir.exists(results_dir)) {
dir.create(results_dir, recursive = TRUE)
}
source(file.path(analysis_dir, "util", "survival_models.R"))
Set metadata and cluster assignment file paths
metadata_file <- file.path(input_dir, "splicing_indices_with_survival.tsv")
cluster_file <- file.path(root_dir, "analyses",
"sample-psi-clustering", "results",
"sample-cluster-metadata-top-5000-events-stranded.tsv")
kegg_scores_stranded_file <- file.path(root_dir, "analyses",
"sample-psi-clustering", "results",
"gsva_output_stranded.tsv")
Wrangle data Add cluster assignment and spliceosome gsva scores to
metadata and define column lgg_group (LGG or
non_LGG)
metadata <- read_tsv(metadata_file)
Rows: 684 Columns: 26
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (11): Kids_First_Biospecimen_ID, Histology, Kids_First_Participant_ID, molecular_subtype, extent_of_tumor_resection, EFS_event_type, OS_...
dbl (15): Total, AS_neg, AS_pos, AS_total, SI_A3SS, SI_A5SS, SI_RI, SI_SE, SI_Total, EFS_days, OS_days, age_at_diagnosis_days, age_at_diagno...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
clusters <- read_tsv(cluster_file) %>%
dplyr::rename(Kids_First_Biospecimen_ID = sample_id)
Rows: 729 Columns: 8
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (6): sample_id, plot_group, plot_group_hex, RNA_library, molecular_subtype, plot_group_n
dbl (2): cluster, group_n
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gsva_scores <- read_tsv(kegg_scores_stranded_file) %>%
dplyr::filter(geneset == "KEGG_SPLICEOSOME") %>%
dplyr::rename(spliceosome_gsva_score = score)
Rows: 22599 Columns: 3
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): sample_id, geneset
dbl (1): score
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# how many clusters?
n_clust <- length(unique(clusters$cluster))
metadata <- metadata %>%
right_join(clusters %>% dplyr::select(Kids_First_Biospecimen_ID,
cluster)) %>%
left_join(gsva_scores %>% dplyr::select(sample_id,
spliceosome_gsva_score),
by = c("Kids_First_Biospecimen_ID" = "sample_id")) %>%
dplyr::mutate(cluster = glue::glue("Cluster {cluster}")) %>%
dplyr::mutate(cluster = fct_relevel(cluster,
paste0("Cluster ", 1:n_clust))) %>%
dplyr::mutate(lgg_group = case_when(
plot_group == "Low-grade glioma" ~ "LGG",
TRUE ~ "non-LGG"
)) %>%
dplyr::mutate(SBI = SI_Total * 10) %>%
dplyr::mutate(age_at_diagnosis_years = age_at_diagnosis_days/365.25)
Joining with `by = join_by(Kids_First_Biospecimen_ID)`
Generate coxph models including extent of tumor resection, lgg group, and cluster assignment and SBI as covariates
add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+SBI",
file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_SBI.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
forest_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_SBI.RDS")))
`height` was translated to `width`.
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_os
ggsave(file.path(plot_dir, "forest_add_OS_resection_lgg_group_cluster_assignment_SBI.pdf"),
forest_os,
width = 10, height = 6, units = "in",
device = "pdf")
add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+SBI",
file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_SBI.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_SBI.RDS")))
`height` was translated to `width`.
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_efs
ggsave(file.path(plot_dir, "forest_add_EFS_resection_lgg_group_cluster_assignment_SBI.pdf"),
forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
repeat analysis, replacing SBI with KEGG spliceosome gsva score
add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+spliceosome_gsva_score",
file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
forest_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_spliceosome_score.RDS")))
`height` was translated to `width`.
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_os
ggsave(file.path(plot_dir, "forest_add_OS_resection_lgg_group_cluster_assignment_spliceosome_score.pdf"),
forest_os,
width = 10, height = 6, units = "in",
device = "pdf")
models <- c("spliceosome_gsva_score", "SBI")
for (each in models) {
int_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = paste0("extent_of_tumor_resection+lgg_group+cluster*", each, "+age_at_diagnosis_years"),
file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS")),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
int_forest_efs <- plotForest(readRDS(file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS"))))
int_forest_efs
ggsave(file.path(plot_dir, paste0("forest_int_EFS_resection_lgg_group_cluster_assignment_", each, ".pdf")),
int_forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
int_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = paste0("extent_of_tumor_resection+lgg_group+cluster*", each, "+age_at_diagnosis_years"),
file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS")),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
int_forest_os <- plotForest(readRDS(file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS"))))
int_forest_os
ggsave(file.path(plot_dir, paste0("forest_int_OS_resection_lgg_group_cluster_assignment_", each, ".pdf")),
int_forest_os,
width = 10, height = 6, units = "in",
device = "pdf")
}
`height` was translated to `width`.
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
`height` was translated to `width`.
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
`height` was translated to `width`.
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
`height` was translated to `width`.
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+spliceosome_gsva_score",
file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_spliceosome_score.RDS")))
`height` was translated to `width`.
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_efs
ggsave(file.path(plot_dir, "forest_add_EFS_resection_lgg_group_cluster_assignment_spliceosome_score.pdf"),
forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
Subset metadata for LGG, and only include clusters with
>= 10 samples
lgg <- metadata %>%
dplyr::filter(plot_group == "Low-grade glioma") %>%
dplyr::mutate(cluster = factor(cluster)) %>%
dplyr::mutate(mol_sub_group = fct_relevel(mol_sub_group, "Wildtype", after = 0))
retain_clusters_lgg <- lgg %>%
dplyr::count(cluster) %>%
filter(n >= 10) %>%
pull(cluster)
lgg <- lgg %>%
filter(cluster %in% retain_clusters_lgg) %>%
dplyr::mutate(cluster = factor(cluster))
Generate coxph models including covariates
extent_of_tumor_resection, mol_sub_group,
cluster, and SBI and plot
# identify LGG clusters
lgg_clusters <- metadata %>%
filter(lgg_group == "LGG") %>%
mutate(cluster = as.integer(gsub("cluster", "", cluster))) %>%
pull(cluster) %>%
sort() %>%
unique()
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `cluster = as.integer(gsub("cluster", "", cluster))`.
Caused by warning:
! NAs introduced by coercion
add_model_lgg_efs <- fit_save_model(lgg[!lgg$cluster %in% lgg_clusters & !lgg$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+mol_sub_group+cluster+age_at_diagnosis_years+SBI",
file.path(results_dir, "cox_lgg_EFS_additive_terms_resection_subtype_cluster_SBI.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 6,7 ; coefficient may be infinite.
forest_lgg_efs <- plotForest(readRDS(file.path(results_dir, "cox_lgg_EFS_additive_terms_resection_subtype_cluster_SBI.RDS")))
`height` was translated to `width`.
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_lgg_efs
ggsave(file.path(plot_dir, "forest_add_EFS_LGG_resection_subtype_cluster_assignment_SBI.pdf"),
forest_lgg_efs,
width = 10, height = 6, units = "in",
device = "pdf")
repeat analysis replacing SBI with
spliceosome_gsva_score
add_model_lgg_efs <- fit_save_model(lgg[!lgg$cluster %in% lgg_clusters & !lgg$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+mol_sub_group+cluster+age_at_diagnosis_years+spliceosome_gsva_score",
file.path(results_dir, "cox_lgg_EFS_additive_terms_resection_subtype_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 6,7 ; coefficient may be infinite.
forest_lgg_efs <- plotForest(readRDS(file.path(results_dir, "cox_lgg_EFS_additive_terms_resection_subtype_cluster_spliceosome_score.RDS")))
`height` was translated to `width`.
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_lgg_efs
ggsave(file.path(plot_dir, "forest_add_EFS_LGG_resection_subtype_cluster_assignment_spliceosome_score.pdf"),
forest_lgg_efs,
width = 10, height = 6, units = "in",
device = "pdf")
Subset metadata for HGG and retain cluster with n >=
10
hgg <- metadata %>%
dplyr::filter(plot_group %in% c("Other high-grade glioma", "Diffuse midline glioma")) %>%
dplyr::mutate(cluster = factor(cluster)) %>%
dplyr::mutate(mol_sub_group = fct_relevel(mol_sub_group, "HGG, H3 wildtype", after = 0)) %>%
dplyr::filter(!is.na(OS_days) | !is.na(EFS_days))
retain_clusters_hgg <- hgg %>%
dplyr::count(cluster) %>%
filter(n >= 10) %>%
pull(cluster)
hgg <- hgg %>%
filter(cluster %in% retain_clusters_hgg) %>%
dplyr::mutate(cluster = factor(cluster)) %>%
dplyr::mutate(SI_group = case_when(
SBI > summary(SBI)["3rd Qu."] ~ "High SBI",
SBI < summary(SBI)["1st Qu."] ~ "Low SBI",
TRUE ~ NA_character_
)) %>%
dplyr::mutate(spliceosome_group = case_when(
spliceosome_gsva_score > summary(spliceosome_gsva_score)["3rd Qu."] ~ "Splice GSVA 4th Q",
spliceosome_gsva_score > summary(spliceosome_gsva_score)["Median"] ~ "Splice GSVA 3rd Q",
spliceosome_gsva_score > summary(spliceosome_gsva_score)["1st Qu."] ~ "Splice GSVA 2nd Q",
TRUE ~ "Splice GSVA 1st Q"
)) %>%
dplyr::mutate(SI_group = fct_relevel(SI_group,
c("High SBI", "Low SBI"))) %>%
dplyr::mutate(spliceosome_group = fct_relevel(spliceosome_group,
c("Splice GSVA 1st Q",
"Splice GSVA 2nd Q",
"Splice GSVA 3rd Q",
"Splice GSVA 4th Q")))
Generate HGG KM models with spliceosome_group as
covariate
# Generate kaplan meier survival models for OS and EFS, and save outputs
hgg_kap_os <- survival_analysis(
metadata = hgg %>% dplyr::filter(!is.na(spliceosome_group)),
ind_var = "spliceosome_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "OS_days",
status_col = "OS_status"
)
Testing model: survival::Surv(OS_days, OS_status) ~ spliceosome_group with kap.meier
readr::write_rds(hgg_kap_os,
file.path(results_dir, "logrank_hgg_OS_splice_group.RDS"))
hgg_kap_efs <- survival_analysis(
metadata = hgg %>% dplyr::filter(!is.na(spliceosome_group)),
ind_var = "spliceosome_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "EFS_days",
status_col = "EFS_status"
)
Testing model: survival::Surv(EFS_days, EFS_status) ~ spliceosome_group with kap.meier
readr::write_rds(hgg_kap_efs,
file.path(results_dir, "logrank_hgg_EFS_splice_group.RDS"))
Generate KM plots
km_hgg_os_plot <- plotKM(model = hgg_kap_os,
variable = "spliceosome_group",
combined = F,
title = "HGG, overall survival",
p_pos = "topright")
Ignoring unknown labels:
• fill : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
ggsave(file.path(plot_dir, "km_hgg_OS_spliceosome_score.pdf"),
km_hgg_os_plot,
width = 9, height = 5, units = "in",
device = "pdf")
Ignoring unknown labels:
• fill : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Ignoring unknown labels:
• colour : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's colour values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
km_hgg_efs_plot <- plotKM(model = hgg_kap_efs,
variable = "spliceosome_group",
combined = F,
title = "HGG, event-free survival",
p_pos = "topright")
Ignoring unknown labels:
• fill : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
ggsave(file.path(plot_dir, "km_hgg_EFS_spliceosome_score.pdf"),
km_hgg_efs_plot,
width = 9, height = 5, units = "in",
device = "pdf")
Ignoring unknown labels:
• fill : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Ignoring unknown labels:
• colour : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's colour values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Generate coxph models for HGG including covariates
mol_sub_group cluster, and SBI,
and plot
add_model_hgg_os <- fit_save_model(hgg,
terms = "mol_sub_group+age_at_diagnosis_years+SBI",
file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_SBI.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 9 ; coefficient may be infinite.
forest_hgg_os <- plotForest(readRDS(file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_SBI.RDS")))
`height` was translated to `width`.
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_text()`).
forest_hgg_os
ggsave(file.path(plot_dir, "forest_add_OS_HGG_subtype_cluster_assignment_SBI.pdf"),
forest_hgg_os,
width = 9, height = 5, units = "in",
device = "pdf")
add_model_hgg_efs <- fit_save_model(hgg,
terms = "mol_sub_group+age_at_diagnosis_years+SBI",
file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_SBI.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_hgg_efs <- plotForest(readRDS(file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_SBI.RDS")))
`height` was translated to `width`.
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_EFS_HGG_subtype_cluster_assignment_SBI.pdf"),
forest_hgg_efs,
width = 9, height = 5, units = "in",
device = "pdf")
Repeat analysis replacing SBI with
spliceosome_gsva_score
add_model_hgg_os <- fit_save_model(hgg,
terms = "mol_sub_group+age_at_diagnosis_years+spliceosome_gsva_score",
file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 9 ; coefficient may be infinite.
forest_hgg_os <- plotForest(readRDS(file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_spliceosome_score.RDS")))
`height` was translated to `width`.
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_text()`).
forest_hgg_os
ggsave(file.path(plot_dir, "forest_add_OS_HGG_subtype_cluster_assignment_spliceosome_score.pdf"),
forest_hgg_os,
width = 9, height = 5, units = "in",
device = "pdf")
add_model_hgg_efs <- fit_save_model(hgg,
terms = "mol_sub_group+age_at_diagnosis_years+spliceosome_gsva_score",
file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_hgg_efs <- plotForest(readRDS(file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_spliceosome_score.RDS")))
`height` was translated to `width`.
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_EFS_HGG_subtype_cluster_assignment_spliceosome_score.pdf"),
forest_hgg_efs,
width = 9, height = 5, units = "in",
device = "pdf")
Filter for cluster 7
cluster7_df <- metadata %>%
dplyr::filter(cluster == "Cluster 7",
!is.na(EFS_days)) %>%
dplyr::mutate(SI_group = case_when(
SBI > summary(SBI)["3rd Qu."] ~ "High SBI",
SBI < summary(SBI)["1st Qu."] ~ "Low SBI",
TRUE ~ NA_character_
)) %>%
dplyr::mutate(spliceosome_group = case_when(
spliceosome_gsva_score > summary(spliceosome_gsva_score)["3rd Qu."] ~ "Splice GSVA 4th Q",
spliceosome_gsva_score > summary(spliceosome_gsva_score)["Median"] ~ "Splice GSVA 3rd Q",
spliceosome_gsva_score > summary(spliceosome_gsva_score)["1st Qu."] ~ "Splice GSVA 2nd Q",
TRUE ~ "Splice GSVA 1st Q"
)) %>%
dplyr::mutate(SI_group = fct_relevel(SI_group,
c("High SBI", "Low SBI"))) %>%
dplyr::mutate(spliceosome_group = fct_relevel(spliceosome_group,
c("Splice GSVA 1st Q",
"Splice GSVA 2nd Q",
"Splice GSVA 3rd Q",
"Splice GSVA 4th Q")))
Generate KM models with SI_group as covariate
# Generate kaplan meier survival models for OS and EFS, and save outputs
c7_si_kap_os <- survival_analysis(
metadata = cluster7_df %>% dplyr::filter(!is.na(SI_group)),
ind_var = "SI_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "OS_days",
status_col = "OS_status"
)
Testing model: survival::Surv(OS_days, OS_status) ~ SI_group with kap.meier
readr::write_rds(c7_si_kap_os,
file.path(results_dir, "logrank_cluster7_OS_SBI.RDS"))
c7_si_kap_efs <- survival_analysis(
metadata = cluster7_df %>% dplyr::filter(!is.na(SI_group)),
ind_var = "SI_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "EFS_days",
status_col = "EFS_status"
)
Testing model: survival::Surv(EFS_days, EFS_status) ~ SI_group with kap.meier
readr::write_rds(c7_si_kap_efs,
file.path(results_dir, "logrank_cluster7_EFS_SBI.RDS"))
Generate Cluster 7 KM SI_group plots
km_c7_si_os_plot <- plotKM(model = c7_si_kap_os,
variable = "SI_group",
combined = F,
title = "Cluster 7, overall survival",
p_pos = "topright")
Ignoring unknown labels:
• fill : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
ggsave(file.path(plot_dir, "km_cluster7_OS_sbi_group.pdf"),
km_c7_si_os_plot,
width = 8, height = 5, units = "in",
device = "pdf")
Ignoring unknown labels:
• fill : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Ignoring unknown labels:
• colour : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's colour values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
km_c7_si_efs_plot <- plotKM(model = c7_si_kap_efs,
variable = "SI_group",
combined = F,
title = "Cluster 7, event-free survival",
p_pos = "topright")
Ignoring unknown labels:
• fill : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
ggsave(file.path(plot_dir, "km_cluster7_EFS_sbi_group.pdf"),
km_c7_si_efs_plot,
width = 8, height = 5, units = "in",
device = "pdf")
Ignoring unknown labels:
• fill : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Ignoring unknown labels:
• colour : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's colour values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Generate KM models with spliceosome_group as
covariate
# Generate kaplan meier survival models for OS and EFS, and save outputs
c7_splice_kap_os <- survival_analysis(
metadata = cluster7_df %>%
dplyr::filter(spliceosome_group %in% c("Splice GSVA 4th Q", "Splice GSVA 1st Q")) %>%
dplyr::mutate(spliceosome_group = factor(spliceosome_group,
levels = c("Splice GSVA 1st Q", "Splice GSVA 4th Q"))),
ind_var = "spliceosome_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "OS_days",
status_col = "OS_status"
)
Testing model: survival::Surv(OS_days, OS_status) ~ spliceosome_group with kap.meier
readr::write_rds(c7_splice_kap_os,
file.path(results_dir, "logrank_cluster7_OS_splice_group.RDS"))
c7_splice_kap_efs <- survival_analysis(
metadata = cluster7_df %>%
dplyr::filter(spliceosome_group %in% c("Splice GSVA 4th Q", "Splice GSVA 1st Q")) %>%
dplyr::mutate(spliceosome_group = factor(spliceosome_group,
levels = c("Splice GSVA 1st Q", "Splice GSVA 4th Q"))),
ind_var = "spliceosome_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "EFS_days",
status_col = "EFS_status"
)
Testing model: survival::Surv(EFS_days, EFS_status) ~ spliceosome_group with kap.meier
readr::write_rds(c7_splice_kap_efs,
file.path(results_dir, "logrank_cluster7_EFS_splice_group.RDS"))
Generate Cluster 7 KM spliceosome_group plots
km_c7_splice_os_plot <- plotKM(model = c7_splice_kap_os,
variable = "spliceosome_group",
combined = F,
title = "Cluster 7, overall survival",
p_pos = "topright")
Ignoring unknown labels:
• fill : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
ggsave(file.path(plot_dir, "km_cluster7_OS_splice_group.pdf"),
km_c7_splice_os_plot,
width = 9, height = 5, units = "in",
device = "pdf")
Ignoring unknown labels:
• fill : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Ignoring unknown labels:
• colour : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's colour values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
km_c7_splice_efs_plot <- plotKM(model = c7_splice_kap_efs,
variable = "spliceosome_group",
combined = F,
title = "Cluster 7, event-free survival",
p_pos = "topright")
Ignoring unknown labels:
• fill : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
ggsave(file.path(plot_dir, "km_cluster7_EFS_splice_group.pdf"),
km_c7_splice_efs_plot,
width = 9, height = 5, units = "in",
device = "pdf")
Ignoring unknown labels:
• fill : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Ignoring unknown labels:
• colour : ""
Warning: No shared levels found between `names(values)` of the manual scale and the data's colour values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Assess EFS and OS by SBI or spliceosome GSVA score in multivariate models and generate forest plots
add_model_c7_efs <- fit_save_model(cluster7_df %>%
dplyr::filter(extent_of_tumor_resection != "Unavailable",
spliceosome_group %in% c("Splice GSVA 4th Q", "Splice GSVA 1st Q")) %>%
dplyr::mutate(plot_group = fct_relevel(plot_group, "Low-grade glioma", after = 0)),
terms = "extent_of_tumor_resection+age_at_diagnosis_years+plot_group+spliceosome_group",
file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 6 ; coefficient may be infinite.
forest_c7_spliceosome_efs <- plotForest(readRDS(file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_spliceosome_score.RDS")))
`height` was translated to `width`.
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_EFS_cluster7_histology_resection_spliceosome_group.pdf"),
forest_c7_spliceosome_efs,
width = 9, height = 4, units = "in",
device = "pdf")
add_model_c7_os <- fit_save_model(cluster7_df %>%
dplyr::filter(!extent_of_tumor_resection %in% c("Not Reported", "Unavailable")) %>%
dplyr::mutate(plot_group = fct_relevel(plot_group, "Low-grade glioma", after = 0)),
terms = "extent_of_tumor_resection+age_at_diagnosis_years+plot_group+SBI",
file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_si_group.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 4,5,8,9 ; coefficient may be infinite.
forest_c7_si_os <- plotForest(readRDS(file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_si_group.RDS")))
Warning in scale_x_log10(labels = function(x) format(x, scientific = FALSE)) :
log-10 transformation introduced infinite values.
`height` was translated to `width`.
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_OS_cluster7_histology_resection_si.pdf"),
forest_c7_si_os,
width = 9, height = 4, units = "in",
device = "pdf")
Print session info
sessionInfo()
R version 4.4.0 (2024-04-24)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.4 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8
[6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
time zone: Etc/UTC
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] gtools_3.9.5 survminer_0.4.9 patchwork_1.2.0 ggpubr_0.6.0 survival_3.7-0 lubridate_1.9.4 forcats_1.0.1 stringr_1.6.0
[9] dplyr_1.1.4 purrr_1.2.0 readr_2.1.6 tidyr_1.3.1 tibble_3.3.0 ggplot2_4.0.1 tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] gtable_0.3.6 xfun_0.54 bslib_0.9.0 rstatix_0.7.2 lattice_0.22-7 tzdb_0.5.0 vctrs_0.6.5
[8] tools_4.4.0 generics_0.1.4 parallel_4.4.0 pkgconfig_2.0.3 Matrix_1.7-4 data.table_1.17.8 RColorBrewer_1.1-3
[15] S7_0.2.1 lifecycle_1.0.4 compiler_4.4.0 farver_2.1.2 textshaping_1.0.4 carData_3.0-5 colorblindr_0.1.0
[22] htmltools_0.5.8.1 sass_0.4.10 yaml_2.3.10 crayon_1.5.3 pillar_1.11.1 car_3.1-2 jquerylib_0.1.4
[29] cachem_1.1.0 abind_1.4-5 km.ci_0.5-6 commonmark_2.0.0 tidyselect_1.2.1 digest_0.6.39 stringi_1.8.7
[36] labeling_0.4.3 splines_4.4.0 cowplot_1.1.3 rprojroot_2.1.1 fastmap_1.2.0 grid_4.4.0 colorspace_2.1-2
[43] cli_3.6.5 magrittr_2.0.4 broom_1.0.10 withr_3.0.2 scales_1.4.0 backports_1.5.0 bit64_4.6.0-1
[50] timechange_0.3.0 rmarkdown_2.30 ggtext_0.1.2 bit_4.6.0 gridExtra_2.3 ggsignif_0.6.4 ragg_1.5.0
[57] zoo_1.8-12 hms_1.1.4 evaluate_1.0.5 knitr_1.50 KMsurv_0.1-5 markdown_1.13 survMisc_0.5.6
[64] rlang_1.1.6 Rcpp_1.1.0 gridtext_0.1.5 xtable_1.8-4 glue_1.8.0 xml2_1.5.0 vroom_1.6.6
[71] rstudioapi_0.17.1 jsonlite_2.0.0 R6_2.6.1 systemfonts_1.3.1